Time analysis tests


In [ ]:
%run "../Functions/6. Time analysis.ipynb"

In [ ]:
perDayEvents = rmdf1522['userTime'].map(lambda t: t.date()).value_counts().sort_index()
perDaySessions = rmdf1522[rmdf1522['type'] == 'start']['userTime'].map(lambda t: t.date()).value_counts().sort_index()
perDayUsers = rmdf1522.groupby('userId').agg({ "userTime": np.min })['userTime'].map(lambda t: t.date()).value_counts().sort_index()
perDaySurveys = gform.groupby(localplayerguidkey).agg({ QTimestamp: np.min })[QTimestamp].map(lambda t: t.date()).value_counts().sort_index()

In [ ]:
min(rmdf1522['userTime']), min(gform[QTimestamp])

In [ ]:
minDate = min(min(rmdf1522['userTime']), min(gform[QTimestamp])).date()
maxDate = max(max(rmdf1522['userTime']), max(gform[QTimestamp])).date()
minDate, maxDate

In [ ]:
dateIndex = pd.date_range(minDate, maxDate)
dateIndex = perDaySurveys.reindex(dateIndex, fill_value=0)

In [ ]:
plotPerDay(perDaySurveys)

In [ ]:
valuesPerDay = perDaySurveys
title = ''
loc = 2
startDate=None
endDate=None

#def plotPerDay(valuesPerDay, title='', loc=2, startDate=None, endDate=None):

dateIndex = valuesPerDay.index

if (startDate == None):
    startDate = min(dateIndex)
if (endDate == None):
    endDate = max(dateIndex)

dateIndex = pd.date_range(startDate, endDate)
valuesPerDay = valuesPerDay.reindex(dateIndex, fill_value=0)

ax1 = valuesPerDay.sort_index().plot(
    rot=90,
    label='per day',
    legend=False,
    color='blue',
)
ax1.set_ylabel("per day")
h1, l1 = ax1.get_legend_handles_labels()

ax2 = np.cumsum(valuesPerDay.sort_index()).plot(
    rot=90,
    label='cumulative',
    legend=False,
    secondary_y=True,
    color='red'
)
ax2.set_ylabel("cumulative")
h2, l2 = ax2.get_legend_handles_labels()

plt.legend(h1+h2, l1+l2, loc=loc)
plt.title(title)
plt.show()

In [ ]:
valuesPerDay = perDaySurveys
title = ''
loc = 2
startDate=None
endDate=None

#def plotPerDay2(valuesPerDay, title='', loc=0, startDate=None, endDate=None):

dateIndex = valuesPerDay.index

if (startDate == None):
    startDate = min(dateIndex)
if (endDate == None):
    endDate = max(dateIndex)

dateIndex = pd.date_range(startDate, endDate)
valuesPerDay = valuesPerDay.reindex(dateIndex, fill_value=0)
        
fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.plot(valuesPerDay.sort_index().index, valuesPerDay.sort_index(), label='per day', color='blue')
plt.xticks(rotation='vertical')
ax2 = ax1.twinx()
ax2.plot(valuesPerDay.sort_index().index, np.cumsum(valuesPerDay.sort_index()), label='cumulative', color='red')

lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc=loc)

ax1.set_ylabel("per day")
ax2.set_ylabel("cumulative")
plt.title(title)
plt.show()

In [ ]: